package com.core.family.spider.jsoup;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;

/**
 * @Classname: JsoupExample
 * @CreateTime: 2024-04-22  17:25
 * @Created by: HL
 * @Description:
 * @Version: v1.0
 */
public class JsoupExample {
    public static void main(String[] args) {
        try {
            // 发送HTTP GET请求并获取网页内容
            Document document = Jsoup.connect("https://github.com/yudaocode/SpringBoot-Labs#spring-cloud-alibaba-%E4%B8%93%E6%A0%8F").get();
            String title, license, label, mlanguage;
            // 获取网页标题
            title = document.title();
            license = document.getElementById("license-popup").text();
            StringBuilder projectLabel = new StringBuilder();

            Elements ets = document.select(".project-label-item");
            ets.forEach(et -> {
                projectLabel.append(et.text())
                        .append(";");
            });
            label = projectLabel.toString();
            mlanguage = document.selectFirst(".summary-languages").text();


            System.out.println("网页标题：" + title);


            // 获取所有的链接
            Elements links = document.select("a[href]");
            System.out.println("链接数量：" + links.size());

            // 打印每个链接的文本和URL
            for (Element link : links) {
                String linkText = link.text();
                String linkUrl = link.attr("href");
                System.out.println("链接文本：" + linkText);
                System.out.println("链接URL：" + linkUrl);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}